import torch
from transformers import AutoModel, AutoModelForSequenceClassification

print(torch.cuda.device_count())
print(torch.cuda.memory_allocated() / 1024 ** 2, "MB")  # 查看显存占用

model = AutoModel.from_pretrained("D:/ai/huggingface-models",
                                  subfolder="TinyBERT_General_4L_312D",
                                  # load_in_8bit=True
                                  )
print(model.encoder.layer[0].attention.self.query.weight.dtype)  # 输出: torch.float32
